#Scopus Database Query By Journal Article
library(XML)
library(proto)
library(magrittr)
library(broom)
library(plyr)
library(dplyr)
library(ggplot2)
library(reshape2)
library(httr)
library(stringr)
library(chron)
library(vegan)
library(knitr)
library(bipartite)
library(sna)
library(igraph)
library(knitr)
library(gridExtra)
library(GGally)
library(stringr)
library(networkD3)
#source functions
source("Funtions.R")
#set knitr options
opts_chunk$set(echo=T,cache=F,fig.align='center',fig.height=12,fig.width=14,warning=F,message=F)
Read in data. Processed from ByJournal.R
#journal class
journaldf<-read.csv("C:/Users/Ben/Dropbox/FacultyNetwork/JournalID.csv",row.names=1)
tocompare<-read.table("C:/Users/Ben/Dropbox/FacultyNetwork/ParsedDataID.csv",row.names=NULL,header=T,sep=",",fill=T)
#what does it look like:
tail(tocompare)
## Journal DOI Order Author
## 4763443 Zoologica Scripta SCOPUS_ID:84927938292 3 7005747774
## 4763444 Zoologica Scripta SCOPUS_ID:84927938292 4 8692718700
## 4763445 Zoologica Scripta SCOPUS_ID:84927938292 2 56596457600
## 4763446 Zoologica Scripta SCOPUS_ID:84927938292 1 54890209300
## 4763447 Zoologica Scripta SCOPUS_ID:84927938292 6 6701851552
## 4763448 Zoologica Scripta SCOPUS_ID:84927938292 5 6506098243
## Affiliation
## 4763443 Universita degli Studi del Molise
## 4763444 Universita di Pisa
## 4763445 Museo Civico di Storia Naturale di Trieste
## 4763446 Dipartimento di Biologia Univerisità degli Studi di Napoli Federico II
## 4763447 Universita degli Studi di Napoli Federico II
## 4763448 University of Belgrade
## Citations Year Class h5.index h5.median
## 4763443 0 2015 Zoology 23 27
## 4763444 0 2015 Zoology 23 27
## 4763445 0 2015 Zoology 23 27
## 4763446 0 2015 Zoology 23 27
## 4763447 0 2015 Zoology 23 27
## 4763448 0 2015 Zoology 23 27
dim(tocompare)
## [1] 4763448 10
#remove biogeneral i think for now
tocompare<-tocompare[!tocompare$Class %in% "Biogeneral",]
Basic data cleaning. We only want records from active authors. Atleast 5 publications in the entire record.
#filter authors
#distribution of publication
keep<-names(which(table(tocompare$Author)>3))
tocompare<-droplevels(tocompare[tocompare$Author %in% keep,])
#remove duplicates.
tocompare<-tocompare[!duplicated(tocompare),]
#in case the column names in there
tocompare<-droplevels(tocompare[!tocompare$Journal %in% "DOI",])
dim(tocompare)
## [1] 2715847 10
j_class<-read.csv("Class.csv",row.names=1)
#in case there are malformed classes
tocompare<-droplevels(tocompare[tocompare$Class %in% j_class$Class,])
#take out malformed lines
tocompare<-tocompare[which(!str_detect(tocompare$Journal, "SCOPUS")),]
dim(tocompare)
## [1] 2715830 10
tocompare<-droplevels(tocompare[which(!str_detect(tocompare$h5.index, "SCOPUS")),])
dim(tocompare)
## [1] 2715830 10
Basic descriptive stats on results
#How many journals
paste("Number of Journals:",length(unique(tocompare$Journal)))
## [1] "Number of Journals: 578"
#How many authors
paste("Number of Authors:",length(unique(tocompare$Author)))
## [1] "Number of Authors: 327381"
#How many papers
paste("Number of Papers:",length(unique(tocompare$DOI)))
## [1] "Number of Papers: 1353142"
ta<-sort(table(tocompare$Journal))
print("Most published journals")
## [1] "Most published journals"
tail(ta)
##
## Journal Of Virology Oncogene
## 37067 41152
## Atmospheric Environment Nucleic Acids Research
## 46618 55854
## Molecular And Cellular Biology Journal Of Biological Chemistry
## 67077 77864
How many papers from each discipline over time?
class_year<-group_by(tocompare,Class,Year) %>% summarize(Papers=length(unique(DOI)))
ggplot(class_year,aes(x=as.factor(Year),y=Papers,col=Class,group=Class)) + geom_line() + theme_bw() + facet_wrap(~Class,scale="free_y",ncol=4) + labs(x="Year")
ggsave("Figures/Papers_Year.svg",dpi=300)
Create matrix of authors in each class - analagous to the site by species matrix used in ecology
siteXspp<-as.data.frame.array(table(tocompare$Author,tocompare$Class))
dim(siteXspp)
## [1] 327381 38
Use the abundance of papers by each author to calculate niche overlap (dist=1-Horn’s) between classes.
Low overlap=0 High overlap=1
#Compare disciplines
topics<-1-as.matrix(vegdist(t(siteXspp),"horn"))
Visualize interactions.
g<-graph.adjacency(topics,"undirected",weighted=TRUE)
g<-simplify(g)
# set labels and degrees of vertices
V(g)$label <- V(g)$name
V(g)$degree <- degree(g)
V(g)$label.color <- rgb(0, 0, .2, .8)
V(g)$frame.color <- NA
egam=E(g)$weight/max(E(g)$weight)
E(g)$color<-rgb(0,1,0,alpha=E(g)$weight/max(E(g)$weight),maxColorValue=1)
ramp <- colorRamp(c("blue","red"),alpha=T)
E(g)$color = apply(ramp(E(g)$weight), 1, function(x) rgb(x[1]/255,x[2]/255,x[3]/255,alpha=T) )
#If you need to delete
g.copy <- delete.edges(g, which(E(g)$weight<.05))
#width
width<-(E(g.copy)$weight/max(E(g.copy)$weight))*8
#label sizes
V(g.copy)$degree <- degree(g.copy)
V(g.copy)$label.cex <- V(g.copy)$degree / max(V(g.copy)$degree)*.5+.5
#get vertex size
size<-round(as.numeric(table(tocompare$Class))/max(table(tocompare$Class))*3)+4
#scale
plot.igraph(g.copy,vertex.size=size,edge.width=width)
#try a couple different layouts
# plot the graph in layout1
layout1 <- layout.fruchterman.reingold(g.copy,niter=1000,area=vcount(g.copy)^2.3)
plot(g.copy,edge.width=width,vertex.size=size,layout=layout1,vertex.color="black")
# plot the graph in layout1
layout2 <- layout.reingold.tilford(g.copy,circular=T)
plot(g.copy,edge.width=width,vertex.size=size,layout=layout2,vertex.color="black")
#save full network
svg(filename = "Figures/Overall_NetworkIgraph.svg")
plot(g.copy,edge.width=width,vertex.size=size,layout=layout1,vertex.color="black")
dev.off()
## pdf
## 2
jpeg(filename = "Figures/Overall_NetworkIgraph.jpeg",res=300,height=12,width=10,units="in")
plot(g.copy,edge.width=width,vertex.size=size,layout=layout1,vertex.color="black")
dev.off()
## pdf
## 2
View as a dendrogram
wt <- walktrap.community(g, modularity=TRUE)
dend <- as.dendrogram(wt, use.modularity=TRUE)
plot(as.hclust(dend))
We are interested in the centrality, modularity and compartamentalization of the biological sciences
between_class<-betweenness(g.copy)
degree_class<-degree(g.copy)
closeness_class<-closeness(g.copy)
eigenV<-evcent(g.copy)$vector
vdat<-data.frame(Class=names(between_class),Between=between_class,Degree=degree_class,Closeness=closeness_class,Eigen=eigenV)
Correlation among importance measures.
ggpairs(vdat[,-1])
#reoroder levels
vdat$Class<-factor(vdat$Class,levels=vdat[order(vdat$Between,vdat$Degree),"Class"])
Top actors for each statistic
mdat<-melt(vdat)
colnames(mdat)<-c("Class","Metric","Score")
#order and plot
dt<-group_by(mdat,Metric) %>% mutate(Svalue=as.numeric(scale(Score))) %>% group_by(Metric,Class) %>% arrange(Score)
ggplot(dt,aes(y=Class,x=Svalue)) + geom_bar_horz(stat="identity",position="identity") + facet_grid(.~Metric) + labs(x="Z-score")
ggsave("Figures/OveralMetrics.jpg",dpi=300,height=10,width=12)
ggsave("Figures/OveralMetrics.svg",dpi=300)
group_by(mdat,Metric) %>% summarize(mean=mean(Score))
## Source: local data frame [4 x 2]
##
## Metric mean
## 1 Between 42.31579
## 2 Degree 4.68421
## 3 Closeness 0.01969
## 4 Eigen 0.17660
overall.density<-graph.density(g.copy)
mean(E(g.copy)$weight)
## [1] 0.1409
#split into a time frame 5 years?
m<-seq(1995,2015,4)
tocompare$Time<-cut(as.numeric(as.character(tocompare$Year)),breaks=m,labels=m[1:length(m)-1])
yearcompare<-split(tocompare,tocompare$Time)
#all over
#caculate degree distribution
dd<-melt(sapply(yearcompare,CalcDD))
ggplot(dd,aes(x=Var1,y=value,col=as.factor(L1))) + geom_line(size=.5) + geom_point(size=4,aes(group=as.factor(L1))) + theme_bw() + xlab("Node Degree") + ggtitle("Degree Distribution") + labs(col="Year")
yearstats<-lapply(yearcompare,calcN)
head(yearstats)
## $`1995`
## Class
## Agronomy Crop Science Agronomy Crop Science
## Animal Behavior Animal Behavior
## Animal Husbandry Animal Husbandry
## Atmospheric Sciences Atmospheric Sciences
## Biochemistry Biochemistry
## Biodiversity & Conservation Biology Biodiversity & Conservation Biology
## Bioinformatics Bioinformatics
## Biophysics Biophysics
## Biotechnology Biotechnology
## Birds Birds
## Botany Botany
## Cell Biology Cell Biology
## Developmental Biology & Embryology Developmental Biology & Embryology
## Ecology Ecology
## Environmental & Geological Engineering Environmental & Geological Engineering
## Environmental Sciences Environmental Sciences
## Evolutionary Biology Evolutionary Biology
## Food Science & Technology Food Science & Technology
## Forests & Forestry Forests & Forestry
## Geochemistry & Mineralogy Geochemistry & Mineralogy
## Geology Geology
## Hydrology Hydrology
## Insects & Arthropods Insects & Arthropods
## Marine Sciences Fisheries Marine Sciences Fisheries
## Microbiology Microbiology
## Molecular Biology Molecular Biology
## Mycology Mycology
## Oceanography Oceanography
## Paleontology Paleontology
## Pest Control & Pesticides Pest Control & Pesticides
## Plant Pathology Plant Pathology
## Proteomics & Peptides Proteomics & Peptides
## Soil Sciences Soil Sciences
## Sustainable Development Sustainable Development
## Sustainable Energy Sustainable Energy
## Virology Virology
## Wood Science & Technology Wood Science & Technology
## Zoology Zoology
## Between Degree Closeness Eigen
## Agronomy Crop Science 0 3 0.0025741 0.0000000
## Animal Behavior 31 5 0.0025808 0.0000000
## Animal Husbandry 19 2 0.0025702 0.0007167
## Atmospheric Sciences 4 3 0.0025791 0.0002468
## Biochemistry 0 2 0.0007514 0.7558238
## Biodiversity & Conservation Biology 37 5 0.0025858 0.0000000
## Bioinformatics 0 1 0.0025717 0.0001438
## Biophysics 0 1 0.0007309 0.0005176
## Biotechnology 57 2 0.0025817 0.0009793
## Birds 0 4 0.0025791 0.0000000
## Botany 73 4 0.0025918 0.0000000
## Cell Biology 0 2 0.0007513 0.9433370
## Developmental Biology & Embryology 0 0 0.0007112 0.0000000
## Ecology 86 9 0.0025902 0.0000000
## Environmental & Geological Engineering 0 1 0.0025784 0.0002946
## Environmental Sciences 158 6 0.0025919 0.0007188
## Evolutionary Biology 29 5 0.0025821 0.0000000
## Food Science & Technology 20 2 0.0025675 0.0010704
## Forests & Forestry 95 3 0.0025918 0.0000000
## Geochemistry & Mineralogy 0 1 0.0025204 0.0003445
## Geology 51 3 0.0025587 0.0003147
## Hydrology 22 2 0.0025855 0.0006810
## Insects & Arthropods 0 2 0.0025793 0.0000000
## Marine Sciences Fisheries 0 3 0.0025784 0.0000000
## Microbiology 50 3 0.0025716 0.0015191
## Molecular Biology 0 2 0.0007512 1.0000000
## Mycology 0 0 0.0007112 0.0000000
## Oceanography 72 4 0.0025809 0.0000000
## Paleontology 0 1 0.0025263 0.0003534
## Pest Control & Pesticides 0 2 0.0025569 0.0000000
## Plant Pathology 25 3 0.0025782 0.0000000
## Proteomics & Peptides 0 1 0.0007309 0.0005176
## Soil Sciences 161 5 0.0025941 0.0007392
## Sustainable Development 0 0 0.0007112 0.0000000
## Sustainable Energy 0 0 0.0007112 0.0000000
## Virology 0 1 0.0025617 0.0005733
## Wood Science & Technology 0 0 0.0007112 0.0000000
## Zoology 0 5 0.0025780 0.0000000
##
## $`1999`
## Class
## Agronomy Crop Science Agronomy Crop Science
## Animal Behavior Animal Behavior
## Animal Husbandry Animal Husbandry
## Atmospheric Sciences Atmospheric Sciences
## Biochemistry Biochemistry
## Biodiversity & Conservation Biology Biodiversity & Conservation Biology
## Bioinformatics Bioinformatics
## Biophysics Biophysics
## Biotechnology Biotechnology
## Birds Birds
## Botany Botany
## Cell Biology Cell Biology
## Developmental Biology & Embryology Developmental Biology & Embryology
## Ecology Ecology
## Environmental & Geological Engineering Environmental & Geological Engineering
## Environmental Sciences Environmental Sciences
## Evolutionary Biology Evolutionary Biology
## Food Science & Technology Food Science & Technology
## Forests & Forestry Forests & Forestry
## Geochemistry & Mineralogy Geochemistry & Mineralogy
## Geology Geology
## Hydrology Hydrology
## Insects & Arthropods Insects & Arthropods
## Marine Sciences Fisheries Marine Sciences Fisheries
## Microbiology Microbiology
## Molecular Biology Molecular Biology
## Mycology Mycology
## Oceanography Oceanography
## Paleontology Paleontology
## Pest Control & Pesticides Pest Control & Pesticides
## Plant Pathology Plant Pathology
## Proteomics & Peptides Proteomics & Peptides
## Soil Sciences Soil Sciences
## Sustainable Development Sustainable Development
## Sustainable Energy Sustainable Energy
## Virology Virology
## Wood Science & Technology Wood Science & Technology
## Zoology Zoology
## Between Degree Closeness Eigen
## Agronomy Crop Science 0 3 0.0023426 0.0009045
## Animal Behavior 46 7 0.0023508 0.0000000
## Animal Husbandry 24 2 0.0023420 0.0020929
## Atmospheric Sciences 4 3 0.0023441 0.0016098
## Biochemistry 6 3 0.0008215 0.7369261
## Biodiversity & Conservation Biology 41 6 0.0023530 0.0000000
## Bioinformatics 0 0 0.0007112 0.0000000
## Biophysics 0 1 0.0008211 0.0055580
## Biotechnology 42 2 0.0023487 0.0027356
## Birds 0 4 0.0023490 0.0000000
## Botany 109 5 0.0023588 0.0009055
## Cell Biology 3 3 0.0008214 0.9366376
## Developmental Biology & Embryology 1 2 0.0008213 0.1619996
## Ecology 70 9 0.0023560 0.0000000
## Environmental & Geological Engineering 0 1 0.0023477 0.0009099
## Environmental Sciences 144 6 0.0023576 0.0003393
## Evolutionary Biology 0 4 0.0023465 0.0000000
## Food Science & Technology 15 2 0.0023389 0.0027422
## Forests & Forestry 43 4 0.0023580 0.0000000
## Geochemistry & Mineralogy 0 1 0.0022936 0.0005489
## Geology 49 3 0.0023268 0.0000000
## Hydrology 14 2 0.0023504 0.0012070
## Insects & Arthropods 3 3 0.0023514 0.0000000
## Marine Sciences Fisheries 0 3 0.0023479 0.0000000
## Microbiology 37 3 0.0023417 0.0036560
## Molecular Biology 0 3 0.0008212 1.0000000
## Mycology 0 0 0.0007112 0.0000000
## Oceanography 76 4 0.0023490 0.0000000
## Paleontology 0 1 0.0022945 0.0005611
## Pest Control & Pesticides 0 2 0.0023276 0.0000000
## Plant Pathology 21 3 0.0023496 0.0000000
## Proteomics & Peptides 4 2 0.0008214 0.0404448
## Soil Sciences 135 5 0.0023594 0.0008388
## Sustainable Development 0 0 0.0007112 0.0000000
## Sustainable Energy 0 0 0.0007112 0.0000000
## Virology 0 1 0.0023328 0.0015757
## Wood Science & Technology 0 0 0.0007112 0.0000000
## Zoology 0 5 0.0023426 0.0000000
##
## $`2003`
## Class
## Agronomy Crop Science Agronomy Crop Science
## Animal Behavior Animal Behavior
## Animal Husbandry Animal Husbandry
## Atmospheric Sciences Atmospheric Sciences
## Biochemistry Biochemistry
## Biodiversity & Conservation Biology Biodiversity & Conservation Biology
## Bioinformatics Bioinformatics
## Biophysics Biophysics
## Biotechnology Biotechnology
## Birds Birds
## Botany Botany
## Cell Biology Cell Biology
## Developmental Biology & Embryology Developmental Biology & Embryology
## Ecology Ecology
## Environmental & Geological Engineering Environmental & Geological Engineering
## Environmental Sciences Environmental Sciences
## Evolutionary Biology Evolutionary Biology
## Food Science & Technology Food Science & Technology
## Forests & Forestry Forests & Forestry
## Geochemistry & Mineralogy Geochemistry & Mineralogy
## Geology Geology
## Hydrology Hydrology
## Insects & Arthropods Insects & Arthropods
## Marine Sciences Fisheries Marine Sciences Fisheries
## Microbiology Microbiology
## Molecular Biology Molecular Biology
## Mycology Mycology
## Oceanography Oceanography
## Paleontology Paleontology
## Pest Control & Pesticides Pest Control & Pesticides
## Plant Pathology Plant Pathology
## Proteomics & Peptides Proteomics & Peptides
## Soil Sciences Soil Sciences
## Sustainable Development Sustainable Development
## Sustainable Energy Sustainable Energy
## Virology Virology
## Wood Science & Technology Wood Science & Technology
## Zoology Zoology
## Between Degree Closeness Eigen
## Agronomy Crop Science 32 4 0.0109402 5.414e-04
## Animal Behavior 141 7 0.0114390 0.000e+00
## Animal Husbandry 121 2 0.0113078 7.424e-04
## Atmospheric Sciences 4 3 0.0109475 8.567e-04
## Biochemistry 156 4 0.0110898 7.857e-01
## Biodiversity & Conservation Biology 30 6 0.0114403 0.000e+00
## Bioinformatics 0 1 0.0111388 0.000e+00
## Biophysics 0 1 0.0105071 4.070e-03
## Biotechnology 104 2 0.0113332 5.660e-03
## Birds 0 4 0.0112244 0.000e+00
## Botany 154 5 0.0114464 0.000e+00
## Cell Biology 33 3 0.0099734 9.147e-01
## Developmental Biology & Embryology 1 2 0.0095752 1.940e-01
## Ecology 66 9 0.0114530 0.000e+00
## Environmental & Geological Engineering 0 1 0.0111837 3.110e-04
## Environmental Sciences 237 7 0.0115436 9.633e-04
## Evolutionary Biology 94 6 0.0114024 0.000e+00
## Food Science & Technology 113 2 0.0112074 3.307e-03
## Forests & Forestry 155 4 0.0115144 0.000e+00
## Geochemistry & Mineralogy 0 1 0.0096766 3.206e-04
## Geology 67 3 0.0107169 1.609e-04
## Hydrology 16 2 0.0110891 2.472e-04
## Insects & Arthropods 30 4 0.0114050 0.000e+00
## Marine Sciences Fisheries 0 3 0.0111472 0.000e+00
## Microbiology 212 4 0.0112421 3.852e-02
## Molecular Biology 0 3 0.0093460 1.000e+00
## Mycology 0 1 0.0108444 9.472e-05
## Oceanography 110 4 0.0112477 0.000e+00
## Paleontology 0 1 0.0097645 2.992e-04
## Pest Control & Pesticides 0 3 0.0107502 1.681e-04
## Plant Pathology 34 4 0.0111202 2.352e-04
## Proteomics & Peptides 34 2 0.0108018 5.481e-02
## Soil Sciences 171 4 0.0115333 4.008e-04
## Sustainable Development 0 1 0.0112691 2.574e-04
## Sustainable Energy 0 0 0.0007112 0.000e+00
## Virology 0 1 0.0108980 3.245e-03
## Wood Science & Technology 0 0 0.0007112 0.000e+00
## Zoology 1 6 0.0111714 0.000e+00
##
## $`2007`
## Class
## Agronomy Crop Science Agronomy Crop Science
## Animal Behavior Animal Behavior
## Animal Husbandry Animal Husbandry
## Atmospheric Sciences Atmospheric Sciences
## Biochemistry Biochemistry
## Biodiversity & Conservation Biology Biodiversity & Conservation Biology
## Bioinformatics Bioinformatics
## Biophysics Biophysics
## Biotechnology Biotechnology
## Birds Birds
## Botany Botany
## Cell Biology Cell Biology
## Developmental Biology & Embryology Developmental Biology & Embryology
## Ecology Ecology
## Environmental & Geological Engineering Environmental & Geological Engineering
## Environmental Sciences Environmental Sciences
## Evolutionary Biology Evolutionary Biology
## Food Science & Technology Food Science & Technology
## Forests & Forestry Forests & Forestry
## Geochemistry & Mineralogy Geochemistry & Mineralogy
## Geology Geology
## Hydrology Hydrology
## Insects & Arthropods Insects & Arthropods
## Marine Sciences Fisheries Marine Sciences Fisheries
## Microbiology Microbiology
## Molecular Biology Molecular Biology
## Mycology Mycology
## Oceanography Oceanography
## Paleontology Paleontology
## Pest Control & Pesticides Pest Control & Pesticides
## Plant Pathology Plant Pathology
## Proteomics & Peptides Proteomics & Peptides
## Soil Sciences Soil Sciences
## Sustainable Development Sustainable Development
## Sustainable Energy Sustainable Energy
## Virology Virology
## Wood Science & Technology Wood Science & Technology
## Zoology Zoology
## Between Degree Closeness Eigen
## Agronomy Crop Science 31 4 0.0197457 9.262e-02
## Animal Behavior 44 5 0.0207355 2.733e-01
## Animal Husbandry 20 2 0.0199973 2.394e-02
## Atmospheric Sciences 4 3 0.0192430 5.047e-02
## Biochemistry 94 4 0.0203696 1.740e-04
## Biodiversity & Conservation Biology 91 7 0.0212137 8.997e-01
## Bioinformatics 157 2 0.0212056 3.976e-02
## Biophysics 0 1 0.0198502 1.820e-04
## Biotechnology 54 3 0.0198145 4.403e-02
## Birds 14 5 0.0212409 2.873e-01
## Botany 120 5 0.0214997 1.760e-01
## Cell Biology 13 3 0.0194075 3.097e-05
## Developmental Biology & Embryology 0 2 0.0189464 2.596e-05
## Ecology 58 9 0.0212404 1.000e+00
## Environmental & Geological Engineering 0 1 0.0192606 1.663e-02
## Environmental Sciences 201 8 0.0208383 1.297e-01
## Evolutionary Biology 204 8 0.0215895 6.384e-01
## Food Science & Technology 9 2 0.0194905 3.972e-03
## Forests & Forestry 138 4 0.0214681 2.022e-01
## Geochemistry & Mineralogy 0 1 0.0154899 1.141e-02
## Geology 69 3 0.0186679 3.232e-02
## Hydrology 18 2 0.0197379 1.315e-02
## Insects & Arthropods 49 5 0.0211432 2.114e-01
## Marine Sciences Fisheries 0 4 0.0204948 3.489e-01
## Microbiology 91 4 0.0201460 8.103e-03
## Molecular Biology 21 4 0.0200966 0.000e+00
## Mycology 0 1 0.0199363 6.063e-03
## Oceanography 112 4 0.0200162 1.858e-01
## Paleontology 0 1 0.0160128 9.253e-03
## Pest Control & Pesticides 0 3 0.0190735 1.161e-01
## Plant Pathology 37 5 0.0207897 8.822e-02
## Proteomics & Peptides 156 4 0.0208059 2.919e-03
## Soil Sciences 124 4 0.0211094 6.460e-02
## Sustainable Development 58 2 0.0207951 7.599e-02
## Sustainable Energy 66 2 0.0204219 1.374e-02
## Virology 0 1 0.0187863 8.379e-04
## Wood Science & Technology 0 0 0.0007112 1.361e-02
## Zoology 0 6 0.0208041 5.532e-01
##
## $`2011`
## Class
## Agronomy Crop Science Agronomy Crop Science
## Animal Behavior Animal Behavior
## Animal Husbandry Animal Husbandry
## Atmospheric Sciences Atmospheric Sciences
## Biochemistry Biochemistry
## Biodiversity & Conservation Biology Biodiversity & Conservation Biology
## Bioinformatics Bioinformatics
## Biophysics Biophysics
## Biotechnology Biotechnology
## Birds Birds
## Botany Botany
## Cell Biology Cell Biology
## Developmental Biology & Embryology Developmental Biology & Embryology
## Ecology Ecology
## Environmental & Geological Engineering Environmental & Geological Engineering
## Environmental Sciences Environmental Sciences
## Evolutionary Biology Evolutionary Biology
## Food Science & Technology Food Science & Technology
## Forests & Forestry Forests & Forestry
## Geochemistry & Mineralogy Geochemistry & Mineralogy
## Geology Geology
## Hydrology Hydrology
## Insects & Arthropods Insects & Arthropods
## Marine Sciences Fisheries Marine Sciences Fisheries
## Microbiology Microbiology
## Molecular Biology Molecular Biology
## Mycology Mycology
## Oceanography Oceanography
## Paleontology Paleontology
## Pest Control & Pesticides Pest Control & Pesticides
## Plant Pathology Plant Pathology
## Proteomics & Peptides Proteomics & Peptides
## Soil Sciences Soil Sciences
## Sustainable Development Sustainable Development
## Sustainable Energy Sustainable Energy
## Virology Virology
## Wood Science & Technology Wood Science & Technology
## Zoology Zoology
## Between Degree Closeness Eigen
## Agronomy Crop Science 42 5 0.0032114 0.1011057
## Animal Behavior 27 5 0.0032210 0.1726622
## Animal Husbandry 6 2 0.0032089 0.0157589
## Atmospheric Sciences 4 3 0.0031941 0.1455204
## Biochemistry 6 3 0.0008217 0.0000000
## Biodiversity & Conservation Biology 129 9 0.0032337 0.9570650
## Bioinformatics 0 0 0.0007112 0.0124319
## Biophysics 0 1 0.0008214 0.0002939
## Biotechnology 2 3 0.0031938 0.1592360
## Birds 1 3 0.0032198 0.1597218
## Botany 149 7 0.0032331 0.2494717
## Cell Biology 6 3 0.0008217 0.0000000
## Developmental Biology & Embryology 4 2 0.0008215 0.0000000
## Ecology 39 9 0.0032302 1.0000000
## Environmental & Geological Engineering 0 1 0.0032143 0.0278190
## Environmental Sciences 138 10 0.0032306 0.4248704
## Evolutionary Biology 11 5 0.0032228 0.4507029
## Food Science & Technology 15 3 0.0032029 0.0129495
## Forests & Forestry 24 5 0.0032277 0.2314706
## Geochemistry & Mineralogy 0 1 0.0030915 0.0163233
## Geology 55 3 0.0031942 0.0379424
## Hydrology 11 2 0.0032019 0.0325017
## Insects & Arthropods 0 2 0.0031952 0.0507561
## Marine Sciences Fisheries 0 4 0.0032103 0.4793821
## Microbiology 38 4 0.0032000 0.0331862
## Molecular Biology 0 3 0.0008212 0.0000000
## Mycology 42 2 0.0032097 0.0065549
## Oceanography 91 4 0.0032157 0.2827631
## Paleontology 0 1 0.0031385 0.0088195
## Pest Control & Pesticides 0 3 0.0031960 0.0416365
## Plant Pathology 60 4 0.0032201 0.0643753
## Proteomics & Peptides 4 2 0.0008216 0.0002779
## Soil Sciences 39 5 0.0032283 0.1147891
## Sustainable Development 19 4 0.0032228 0.1562710
## Sustainable Energy 20 3 0.0032184 0.0666030
## Virology 0 1 0.0031707 0.0040824
## Wood Science & Technology 0 0 0.0007112 0.0124319
## Zoology 29 5 0.0032170 0.3735696
yearstats<-melt(yearstats)
ggplot(yearstats,aes(x=L1,y=value,col=Class)) + geom_point() +geom_line(aes(group=Class)) + facet_wrap(~variable,scales="free",ncol=1)
yeardat<-lapply(yearcompare,function(x){
siteXspp<-droplevels(as.data.frame.array(table(x$Author,x$Class)))
#drop empty rows and colums
siteXspp<-siteXspp[rownames(siteXspp) %in% names(which(apply(siteXspp,1,sum) > 0)),colnames(siteXspp) %in% names(which(apply(siteXspp,2,sum) > 0))]
#Compare disciplines
topics<-1-as.matrix(vegdist(t(siteXspp),"horn"))
diag(topics)<-NA
topics[upper.tri(topics)]<-NA
return(topics)
})
yeardat<-melt(yeardat)
colnames(yeardat)<-c("To","From","Niche.Overlap","Year")
Remove extremely weak connections
#remove very weak connections
#work from an example data
#make into characters
yeardat$To<-as.character(yeardat$To)
yeardat$From<-as.character(yeardat$From)
#remove weak connections
exdat<-group_by(yeardat,To,From) %>% filter(Niche.Overlap>0.05 & !is.na(Niche.Overlap))
exdat$Combo<-paste(exdat$To,exdat$From,sep="-")
#plot
ggplot(exdat,aes(x=Year,y=Niche.Overlap,col=From)) + geom_point() + geom_line(aes(group=Combo)) + facet_wrap(~To,scales="free",ncol=3) + theme_bw() + geom_text(data=exdat[exdat$Year==m[length(m)/2],],aes(label=From),size=4)
ggsave("Figures/LinkTime.svg",dpi=300)
ggsave("Figures/LinkTime.jpeg",height=10,width=14,dpi=300)
Its not clear to me if i need to use a time-series model. The connection at time A should be independent of connection at time A+1. While they may be related due to the trend - there is no mechanistic connection among publications between years. Its not like a population, where the number of available producers directly influences the offspring in the next year. For the moment, i’m just using a linear model with year as a continious variable. This probably needs to change.
first pass just fit a linear line and determine if its positive or negative
exdat$Combo<-paste(exdat$To,exdat$From,sep="-")
#year is a number value for the moment
exdat$Year<-as.numeric(exdat$Year)-1995
sdat<-split(exdat,exdat$Combo)
#get rid of combinations with less than ten years of points
sdat<-sdat[lapply(sdat,nrow) > 2]
#
# Break up d by state, then fit the specified model to each piece and
# return a list
tmod<-rbind_all(lapply(sdat,function(df){
tdat<-tidy(lm(Niche.Overlap ~ Year, data = df))
tdat<-tdat[tdat$term =="Year",]
tdat$Combo<-unique(df$Combo)
return(tdat)
}))
#extract names into columns
tmod$To<-str_match(tmod$Combo,"(.*)-")[,2]
tmod$From<-str_match(tmod$Combo,"-(.*)")[,2]
Visualize effect over time
Positive values are significantly increasing connections Negative values are significantly decreasing connections
tmod$svalue<-scale(tmod$estimate)
ggplot(tmod[tmod$p.value < 0.05 & tmod$term=="Year",],aes(x=To,y=From,fill=estimate)) + geom_tile() + theme_bw() + scale_fill_gradientn(colours=c("blue","gray","red"),limits=c(-max(tmod$estimate),max(tmod$estimate))) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + labs(x="",y="")
ggsave("InteractionsTime.svg",dpi=300)
ggsave("InteractionsTime.jpeg",height=7,width=7)
The wonderful D3 package connections!
MisLinks<-melt(topics)
#remove very weak connections
MisLinks<-MisLinks[MisLinks$value > 0.05,]
MisLinks$value<-MisLinks$value*10
colnames(MisLinks)<-c("To","From","value")
MisLinks$To<-as.character(MisLinks$To)
MisLinks$From<-as.character(MisLinks$From)
MisNodes<-data.frame(name=as.factor(sort(as.character(unique(c(MisLinks$To,MisLinks$From))))))
#Add groups
MisNodes$group<-as.integer(1)
MisLinks$source<-as.integer(sapply(MisLinks$To,function(x) which(x ==MisNodes$name)))-1
MisLinks$target<-as.integer(sapply(MisLinks$From,function(x) which(x ==MisNodes$name)))-1
#Order by source
MisLinks<-MisLinks[order(MisLinks$source),]
simpleNetwork(MisLinks,fontSize = 15)
simpleNetwork(MisLinks,height=500,width=700) %>% saveNetwork(file = 'Net1.html',selfcontained=F)
d3Network::d3ForceNetwork(Links = MisLinks, Nodes = MisNodes, Source = "source",Target = "target", Value = "value", NodeID = "name", Group = "group", opacity = 0.9,file="Net3.html")
save.image("Analysis.RData")